import os
import datetime
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
mpl.rcParams['figure.figsize'] = (10, 8)
mpl.rcParams['axes.grid'] = False
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/datasets/Appliances_Energy_Prediction_Dataset.csv')
# slice [start:stop:step], starting from index 5 take every 6th record.
df = df[5::6]
date_time = pd.to_datetime(df.pop('date'), format='%Y.%m.%d %H:%M:%S')
df.head()
Plot how the features like Appliances, Lights, & Temperature evolve over time.
plot_cols = ['Appliances', 'lights', 'T_out']
plot_features = df[plot_cols]
plot_features.index = date_time
_ = plot_features.plot(subplots=True)
plot_features = df[plot_cols][:480]
plot_features.index = date_time[:480]
_ = plot_features.plot(subplots=True)
From the Histogram below, it turns out that Appliances' distributed frequencies are asymmetrical. Therefore, to improve model performance, we will be leveraging the log of Appliances that is more symmetric in distribution.
df['log_Appliances'] = np.log(df.Appliances)
appliance = ["Appliances","log_Appliances"]
appliance=df[appliance]
appliance.hist(bins = 20, figsize= (12,8)) ;
df[df.columns[:]].corr()['log_Appliances'][:]
Ploting correlation matrix to understand the dataset dynamics.
col = ['Appliances', 'lights', 'T1', 'RH_1', 'T2', 'RH_2', 'T3', 'RH_3', 'T4',
'RH_4', 'T5', 'RH_5', 'T6', 'RH_6', 'T7', 'RH_7', 'T8', 'RH_8', 'T9',
'RH_9', 'T_out', 'Press_mm_hg', 'RH_out', 'Windspeed', 'Visibility',
'Tdewpoint', 'rv1', 'rv2', 'log_Appliances']
corr=df.corr()
plt.figure(figsize = (18,18))
sns.set(font_scale=1)
sns.heatmap(corr, cbar = True, annot=True, square = True, fmt = '.2f', xticklabels=col, yticklabels=col)
plt.show();
Based on the above histogram & correlation plots observations, we could drop ['Appliances'] and continue with ['log_Appliances'] for better prediction and model performance.
df = df.drop(columns=['Appliances'])
#round to 3 decimal place
df = df.round(3)
def get_redundant_pairs(data):
'''Get diagonal and lower triangular pairs of correlation matrix'''
pairs_to_drop = set()
cols = data.columns
for i in range(0, data.shape[1]):
for j in range(0, i+1):
pairs_to_drop.add((cols[i], cols[j]))
return pairs_to_drop
Top Absolute Correlations
def get_top_abs_correlations(data, n=5):
au_corr = data.corr().abs().unstack()
labels_to_drop = get_redundant_pairs(data)
au_corr = au_corr.drop(labels=labels_to_drop).sort_values(ascending=False)
return au_corr[0:n]
print(get_top_abs_correlations(df, 40))
corr_matrix_transformed = df.corr()
corr_matrix_transformed
corr_matrix_transformed = corr_matrix_transformed[['log_Appliances']].copy()
corr_matrix_transformed.drop(index='log_Appliances', inplace=True)
corr_matrix_transformed.sort_values('log_Appliances', inplace=True)
Remove those features with less than 10% (0.1) correlations with the target variable.
features_to_keep = corr_matrix_transformed.loc[~corr_matrix_transformed['log_Appliances'].between(-0.099, 0.0999)]
features_to_keep = features_to_keep.index.tolist()
features_to_keep
df = df[features_to_keep + ['log_Appliances']]
Pairs plot with selected features have more than 10% correlation to understand the dynamics.
sns.set(style="ticks", color_codes=True)
sns.pairplot(df)
plt.show();
Next look at the statistics of the dataset:
df.describe().transpose()
Convert Time to Seconds
timestamp_s = date_time.map(datetime.datetime.timestamp)
Leverage sin and cos to convert the time to "Time of the day" and "Time of year" signals, which would give the model access to the most important frequency features.
day = 24*60*60
year = (365.2425)*day
df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))
plt.plot(np.array(df['Day sin'])[:25])
plt.plot(np.array(df['Day cos'])[:25])
plt.xlabel('Time [h]')
plt.title('Time of day signal')
Check the above assumptions with tf.signal.rfft of the log_Appliances over time.
fft = tf.signal.rfft(df['log_Appliances'])
f_per_dataset = np.arange(0, len(fft))
n_samples_h = len(df['log_Appliances'])
hours_per_year = 24*365.2524
years_per_dataset = n_samples_h/(hours_per_year)
f_per_year = f_per_dataset/years_per_dataset
plt.step(f_per_year, np.abs(fft))
plt.xscale('log')
plt.ylim(0, 1000)
plt.xlim([0.1, max(plt.xlim())])
plt.xticks([1, 365.2524], labels=['1/Year', '1/day'])
_ = plt.xlabel('Frequency (log scale)')
(70%, 20%, 10%) split for the training, validation, and test sets.
column_indices = {name: i for i, name in enumerate(df.columns)}
n = len(df)
train_df = df[0:int(n*0.8)]
val_df = df[int(n*0.8):int(n*0.9)]
test_df = df[int(n*0.9):]
num_features = df.shape[1]
df.info()
train_mean = train_df.mean()
train_std = train_df.std()
train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std
Now check the distribution of the features in the violin plot below. Though some features have long tails, there are no apparent errors with unexpected values.
df_std = (df - train_mean) / train_std
df_std = df_std.melt(var_name='Column', value_name='Normalized')
plt.figure(figsize=(30, 10))
ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
_ = ax.set_xticklabels(df.keys(), rotation=90)
Start by creating the WindowGenerator class. These will be converted to tf.data.Dataset formatteds windows later.
class WindowGenerator():
def __init__(self, input_width, label_width, shift,
train_df=train_df, val_df=val_df, test_df=test_df,
label_columns=None):
# Store the raw data.
self.train_df = train_df
self.val_df = val_df
self.test_df = test_df
# Work out the label column indices.
self.label_columns = label_columns
if label_columns is not None:
self.label_columns_indices = {name: i for i, name in
enumerate(label_columns)}
self.column_indices = {name: i for i, name in
enumerate(train_df.columns)}
# Work out the window parameters.
self.input_width = input_width
self.label_width = label_width
self.shift = shift
self.total_window_size = input_width + shift
self.input_slice = slice(0, input_width)
self.input_indices = np.arange(self.total_window_size)[self.input_slice]
self.label_start = self.total_window_size - self.label_width
self.labels_slice = slice(self.label_start, None)
self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
def __repr__(self):
return '\n'.join([
f'Total window size: {self.total_window_size}',
f'Input indices: {self.input_indices}',
f'Label indices: {self.label_indices}',
f'Label column name(s): {self.label_columns}'])
Create The Window
w1 = WindowGenerator(input_width=24, label_width=1, shift=24,
label_columns=['log_Appliances'])
w1
w2 = WindowGenerator(input_width=6, label_width=1, shift=1,
label_columns=['log_Appliances'])
w2
2. Split the window into inputs and labels.
def split_window(self, features):
inputs = features[:, self.input_slice, :]
labels = features[:, self.labels_slice, :]
if self.label_columns is not None:
labels = tf.stack(
[labels[:, :, self.column_indices[name]] for name in self.label_columns],
axis=-1)
inputs.set_shape([None, self.input_width, None])
labels.set_shape([None, self.label_width, None])
return inputs, labels
WindowGenerator.split_window = split_window
Test the Window
# Stack three slices, the length of the total window:
example_window = tf.stack([np.array(train_df[:w2.total_window_size]),
np.array(train_df[100:100+w2.total_window_size]),
np.array(train_df[200:200+w2.total_window_size])])
example_inputs, example_labels = w2.split_window(example_window)
print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'labels shape: {example_labels.shape}')
3. Methods for Plotting the Split Window
w2.example = example_inputs, example_labels
def plot(self, model=None, plot_col='log_Appliances', max_subplots=3):
inputs, labels = self.example
plt.figure(figsize=(12, 8))
plot_col_index = self.column_indices[plot_col]
max_n = min(max_subplots, len(inputs))
for n in range(max_n):
plt.subplot(max_n, 1, n+1)
plt.ylabel(f'{plot_col} [normed]')
plt.plot(self.input_indices, inputs[n, :, plot_col_index],
label='Inputs', marker='.', zorder=-10)
if self.label_columns:
label_col_index = self.label_columns_indices.get(plot_col, None)
else:
label_col_index = plot_col_index
if label_col_index is None:
continue
plt.scatter(self.label_indices, labels[n, :, label_col_index],
edgecolors='k', label='Labels', c='#2ca02c', s=64)
if model is not None:
predictions = model(inputs)
plt.scatter(self.label_indices, predictions[n, :, label_col_index],
marker='X', edgecolors='k', label='Predictions',
c='#ff7f0e', s=64)
if n == 0:
plt.legend()
plt.xlabel('Time [h]')
WindowGenerator.plot = plot
Plot the aligned inputs labels with splitted window
w2.plot()
Plot the T_out (Outside Tempature)
# Split Window with T_out
w2.plot(plot_col='T_out')
4. Create tf.data.Datasets
def make_dataset(self, data):
data = np.array(data, dtype=np.float32)
ds = tf.keras.preprocessing.timeseries_dataset_from_array(
data=data,
targets=None,
sequence_length=self.total_window_size,
sequence_stride=1,
shuffle=True,
batch_size=32,)
ds = ds.map(self.split_window)
return ds
WindowGenerator.make_dataset = make_dataset
import tensorflow as tf
print(tf.__version__)
Add properties to access WindowGenerator object as tf.data.Datasets using make_dataset method.
@property
def train(self):
return self.make_dataset(self.train_df)
@property
def val(self):
return self.make_dataset(self.val_df)
@property
def test(self):
return self.make_dataset(self.test_df)
@property
def example(self):
"""Get and cache an example batch of `inputs, labels` for plotting."""
result = getattr(self, '_example', None)
if result is None:
# No example batch was found, so get one from the `.train` dataset
result = next(iter(self.train))
# And cache it for next time
self._example = result
return result
WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example
Now the WindowGenerator object gives access to the tf.data.Dataset objects to iterate over the data.
# Each element is an (inputs, label) pair
w2.train.element_spec
Iterating over the Dataset yields concrete batches:
for example_inputs, example_labels in w2.train.take(1):
print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
print(f'Labels shape (batch, time, features): {example_labels.shape}')
For the single-step model, the training data consists of hourly samples. However, here, the models will learn to predict 1 hour of the future, given 24h of the past.
Define some required functions for window generation and model compilation.
class Baseline(tf.keras.Model):
def __init__(self, label_index=None):
super().__init__()
self.label_index = label_index
def call(self, inputs):
if self.label_index is None:
return inputs
result = inputs[:, :, self.label_index]
return result[:, :, tf.newaxis]
wide_window = WindowGenerator(
input_width=24, label_width=24, shift=1,
label_columns=['log_Appliances'])
wide_window
MAX_EPOCHS = 20
def compile_and_fit(model, window, patience=2):
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=patience,
mode='min')
model.compile(loss=tf.losses.MeanSquaredError(),
optimizer=tf.optimizers.Adam(),
metrics=[tf.metrics.MeanAbsoluteError()])
history = model.fit(window.train, epochs=MAX_EPOCHS,
validation_data=window.val,
callbacks=[early_stopping])
return history
single_step_window = WindowGenerator(
# `WindowGenerator` returns all features as labels if you
# don't set the `label_columns` argument.
input_width=1, label_width=1, shift=1)
wide_window = WindowGenerator(
input_width=24, label_width=24, shift=1)
for example_inputs, example_labels in wide_window.train.take(1):
print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
print(f'Labels shape (batch, time, features): {example_labels.shape}')
baseline = Baseline()
baseline.compile(loss=tf.losses.MeanSquaredError(),
metrics=[tf.metrics.MeanAbsoluteError()])
val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(wide_window.val)
performance['Baseline'] = baseline.evaluate(wide_window.test, verbose=0)
Visualize Performance Metrics: Plot the baseline model's predictions, and it turns out that simply the labels shifted right by one hour.
wide_window.plot(baseline)
%%time
wide_window = WindowGenerator(
input_width=24, label_width=24, shift=1)
lstm_model = tf.keras.models.Sequential([
# Shape [batch, time, features] => [batch, time, lstm_units]
tf.keras.layers.LSTM(32, return_sequences=True),
# Shape => [batch, time, features]
tf.keras.layers.Dense(units=num_features)
])
history = compile_and_fit(lstm_model, wide_window)
IPython.display.clear_output()
val_performance['LSTM'] = lstm_model.evaluate( wide_window.val)
performance['LSTM'] = lstm_model.evaluate( wide_window.test, verbose=0)
print()
Visualize Performance Metrics
wide_window.plot(lstm_model)
%%time
wide_window = WindowGenerator(
input_width=24, label_width=24, shift=1)
GRU_model = tf.keras.models.Sequential([
# Shape [batch, time, features] => [batch, time, lstm_units]
tf.keras.layers.GRU(32, return_sequences=True),
# Shape => [batch, time, features]
tf.keras.layers.Dense(units=num_features)
])
history = compile_and_fit(GRU_model, wide_window)
IPython.display.clear_output()
val_performance['GRU'] = GRU_model.evaluate( wide_window.val)
performance['GRU'] = GRU_model.evaluate( wide_window.test, verbose=0)
print()
Visualize Performance Metrics.
wide_window.plot(GRU_model)
%%time
wide_window = WindowGenerator(
input_width=24, label_width=24, shift=1)
RNN_model = tf.keras.models.Sequential([
# Shape [batch, time, features] => [batch, time, lstm_units]
tf.keras.layers.SimpleRNN(32, return_sequences=True),
# Shape => [batch, time, features]
tf.keras.layers.Dense(units=num_features)
])
history = compile_and_fit(RNN_model, wide_window)
IPython.display.clear_output()
val_performance['RNN'] = RNN_model.evaluate( wide_window.val)
performance['RNN'] = RNN_model.evaluate( wide_window.test, verbose=0)
print()
Visualize Performance Metrics.
wide_window.plot(RNN_model)
class ResidualWrapper(tf.keras.Model):
def __init__(self, model):
super().__init__()
self.model = model
def call(self, inputs, *args, **kwargs):
delta = self.model(inputs, *args, **kwargs)
# The prediction for each timestep is the input
# from the previous time step plus the delta
# calculated by the model.
return inputs + delta
%%time
residual_lstm = ResidualWrapper(
tf.keras.Sequential([
tf.keras.layers.LSTM(32, return_sequences=True),
tf.keras.layers.Dense(
num_features,
# The predicted deltas should start small
# So initialize the output layer with zeros
kernel_initializer=tf.initializers.zeros())
]))
history = compile_and_fit(residual_lstm, wide_window)
IPython.display.clear_output()
val_performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.val)
performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.test, verbose=0)
print()
Visualize Performance Metrics.
wide_window.plot(residual_lstm)
In the above plots of three examples, the single-step model runs over 24 hours to predict one hour in the future. Where:
x = np.arange(len(performance))
width = 0.3
metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in val_performance.values()]
test_mae = [v[metric_index] for v in performance.values()]
plt.bar(x - 0.17, val_mae, width, label='Validation')
plt.bar(x + 0.17, test_mae, width, label='Test')
plt.xticks(ticks=x, labels=performance.keys(),
rotation=45)
plt.ylabel('MAE (average over all outputs)')
_ = plt.legend()
for name, value in performance.items():
print(f'{name:15s}: {value[1]:0.4f}')
For the multi-step model, the training data again consists of hourly samples. However, here, the models will learn to predict 24h of the future, given 24h of the past.
OUT_STEPS = 24
multi_window = WindowGenerator(input_width=24,
label_width=OUT_STEPS,
shift=OUT_STEPS)
multi_window.plot()
multi_window
class MultiStepLastBaseline(tf.keras.Model):
def call(self, inputs):
return tf.tile(inputs[:, -1:, :], [1, OUT_STEPS, 1])
last_baseline = MultiStepLastBaseline()
last_baseline.compile(loss=tf.losses.MeanSquaredError(),
metrics=[tf.metrics.MeanAbsoluteError()])
multi_val_performance = {}
multi_performance = {}
multi_val_performance['Baseline Multi Step Last'] = last_baseline.evaluate(multi_window.val)
multi_performance['Baseline Multi Step Last'] = last_baseline.evaluate(multi_window.test, verbose=0)
Visualize Performance Metrics.
multi_window.plot(last_baseline)
class RepeatBaseline(tf.keras.Model):
def call(self, inputs):
return inputs
repeat_baseline = RepeatBaseline()
repeat_baseline.compile(loss=tf.losses.MeanSquaredError(),
metrics=[tf.metrics.MeanAbsoluteError()])
multi_val_performance['Baseline Repeat'] = repeat_baseline.evaluate(multi_window.val)
multi_performance['Baseline Repeat'] = repeat_baseline.evaluate(multi_window.test, verbose=0)
Visualize Performance Metrics.
multi_window.plot(repeat_baseline)
multi_lstm_model = tf.keras.Sequential([
# Shape [batch, time, features] => [batch, lstm_units]
# Adding more `lstm_units` just overfits more quickly.
tf.keras.layers.LSTM(32, return_sequences=False),
# Shape => [batch, out_steps*features]
tf.keras.layers.Dense(OUT_STEPS*num_features,
kernel_initializer=tf.initializers.zeros()),
# Shape => [batch, out_steps, features]
tf.keras.layers.Reshape([OUT_STEPS, num_features])
])
history = compile_and_fit(multi_lstm_model, multi_window)
IPython.display.clear_output()
multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val)
multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0)
Visualize Performance Metrics.
multi_window.plot(multi_lstm_model)
multi_GRU_model = tf.keras.Sequential([
# Shape [batch, time, features] => [batch, lstm_units]
# Adding more `lstm_units` just overfits more quickly.
tf.keras.layers.GRU(32, return_sequences=False),
# Shape => [batch, out_steps*features]
tf.keras.layers.Dense(OUT_STEPS*num_features,
kernel_initializer=tf.initializers.zeros()),
# Shape => [batch, out_steps, features]
tf.keras.layers.Reshape([OUT_STEPS, num_features])
])
history = compile_and_fit(multi_GRU_model, multi_window)
IPython.display.clear_output()
multi_val_performance['GRU'] = multi_GRU_model.evaluate(multi_window.val)
multi_performance['GRU'] = multi_GRU_model.evaluate(multi_window.test, verbose=0)
Visualize Performance Metrics.
multi_window.plot(multi_GRU_model)
multi_RNN_model = tf.keras.Sequential([
# Shape [batch, time, features] => [batch, lstm_units]
# Adding more `lstm_units` just overfits more quickly.
tf.keras.layers.SimpleRNN(32, return_sequences=False),
# Shape => [batch, out_steps*features]
tf.keras.layers.Dense(OUT_STEPS*num_features,
kernel_initializer=tf.initializers.zeros()),
# Shape => [batch, out_steps, features]
tf.keras.layers.Reshape([OUT_STEPS, num_features])
])
history = compile_and_fit(multi_RNN_model, multi_window)
IPython.display.clear_output()
multi_val_performance['RNN'] = multi_RNN_model.evaluate(multi_window.val)
multi_performance['RNN'] = multi_RNN_model.evaluate(multi_window.test, verbose=0)
Visualize Performance Metrics.
multi_window.plot(multi_RNN_model)
In the above plots of three examples, the single-step model runs over 24 hours to predict 24 hours in the future. Where:
An autoregression time series model that uses observations from previous time steps as input to predicts the value at the next step with a regression equation.
class FeedBack(tf.keras.Model):
def __init__(self, units, out_steps):
super().__init__()
self.out_steps = out_steps
self.units = units
self.lstm_cell = tf.keras.layers.LSTMCell(units)
# Also wrap the LSTMCell in an RNN to simplify the `warmup` method.
self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True)
self.dense = tf.keras.layers.Dense(num_features)
feedback_model = FeedBack(units=32, out_steps=OUT_STEPS)
def warmup(self, inputs):
# inputs.shape => (batch, time, features)
# x.shape => (batch, lstm_units)
x, *state = self.lstm_rnn(inputs)
# predictions.shape => (batch, features)
prediction = self.dense(x)
return prediction, state
FeedBack.warmup = warmup
This method returns a single time-step prediction, and the internal state of the LSTM:
prediction, state = feedback_model.warmup(multi_window.example[0])
prediction.shape
For a dynamic output length leverage a tf.TensorArray and tf.range instead of the python list and range.
def call(self, inputs, training=None):
# Use a TensorArray to capture dynamically unrolled outputs.
predictions = []
# Initialize the lstm state
prediction, state = self.warmup(inputs)
# Insert the first prediction
predictions.append(prediction)
# Run the rest of the prediction steps
for n in range(1, self.out_steps):
# Use the last prediction as input.
x = prediction
# Execute one lstm step.
x, state = self.lstm_cell(x, states=state,
training=training)
# Convert the lstm output to a prediction.
prediction = self.dense(x)
# Add the prediction to the output
predictions.append(prediction)
# predictions.shape => (time, batch, features)
predictions = tf.stack(predictions)
# predictions.shape => (batch, time, features)
predictions = tf.transpose(predictions, [1, 0, 2])
return predictions
FeedBack.call = call
print('Output shape (batch, time, features): ', feedback_model(multi_window.example[0]).shape)
history = compile_and_fit(feedback_model, multi_window)
IPython.display.clear_output()
multi_val_performance['AR LSTM'] = feedback_model.evaluate(multi_window.val)
multi_performance['AR LSTM'] = feedback_model.evaluate(multi_window.test, verbose=0)
Visualize Performance Metrics.
multi_window.plot(feedback_model)
In the above plots of three examples, the single-step model runs over 24 hours to predict 24 hours in the future. Where:
x = np.arange(len(multi_performance))
width = 0.3
metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in multi_val_performance.values()]
test_mae = [v[metric_index] for v in multi_performance.values()]
plt.bar(x - 0.17, val_mae, width, label='Validation')
plt.bar(x + 0.17, test_mae, width, label='Test')
plt.xticks(ticks=x, labels=multi_performance.keys(),
rotation=45)
plt.ylabel(f'MAE (average over all times and outputs)')
_ = plt.legend()
for name, value in multi_performance.items():
print(f'{name:8s}: {value[1]:0.4f}')